metadata = read_tsv(here('data', 'metadata_merged.tsv'), col_types = cols()) %>%
         condition = str_c(CornVariety, FungalStrain, TissueExtraction, sep='_')) %>%
  rename(Corn_Genotype = CornVariety, Fungal_Treatment = FungalStrain, Tissue_Extraction = TissueExtraction)
get_counts = function(.) otu_table(.) %>% as.data.frame() %>% rownames_to_column('feature_id') %>% as_tibble()
get_tax = function(.) tax_table(.) %>% data.frame() %>% rownames_to_column('feature_id') %>% as_tibble()
get_qza = function(filepath){read_qza(filepath)$data %>% as.data.frame() %>% rownames_to_column('SampleID')}
  mutate(SampleID = str_replace(SampleID, '(.*?)-(.*)', '\\2-\\1'),
         SampleID = str_replace_all(SampleID, c('-B73'='-373', '-C322'='-322')))
change_16S_ids_to_match_ITS = . %>% 
  mutate(SampleID = str_replace(SampleID, '(.*?)-(.*)', '\\2-\\1'),
ps = list('16S'= qza_to_phyloseq(features = here('data', '16S', 'table_wo_outliers.qza'),
                          tree=here('data', '16S', 'rooted-tree.qza'),
                          taxonomy=here('data', '16S', 'merged-taxonomy.qza'),
                          metadata=here('data', '16S', 'metadata_filtered.tsv'),
                          tmp='C:/Users/brian.mack/Downloads/tmp') %>% 
            subset_samples(CornVariety != 'dummy' & Timepoint == '2' & TissueType == 'Ovule') %>%
            prune_taxa(taxa_sums(.) > 0, .),
          'ITS' = qza_to_phyloseq(features = here('data', 'ITS', 'table-no-mitochondria-no-chloroplast.qza'),
                          tree=here('data', 'ITS', 'rooted-tree.qza'),
                          taxonomy=here('data', 'ITS', 'merged-taxonomy.qza'),
                          metadata=here('data', 'ITS', 'metadata_merged.tsv'),
                          tmp='C:/Users/brian.mack/Downloads/tmp')  %>% 
            subset_samples(CornVariety  != 'dummy' & Timepoint == '2' & TissueType == 'Ovule') %>%
            prune_taxa(taxa_sums(.) > 0, .)
            )
ps = map(ps, function(.x){
  sample_data(.x) = get_sample_data(.x) %>% 
    rename(TimePoint = Timepoint) %>%
    mutate(TimePoint = str_replace(TimePoint, '^', 'T'),
           condition = str_c(CornVariety, FungalStrain, TissueExtraction, sep='_')) %>%
    rename(Corn_Genotype = CornVariety, Fungal_Treatment = FungalStrain, Tissue_Extraction = TissueExtraction) %>%
    group_by(condition) %>%
    mutate(biological_rep = row_number()) %>%
    ungroup() %>%
    mutate(condition_w_rep = str_c(condition, biological_rep, sep='_')) %>%
    column_to_rownames('SampleID')
  return(.x)})
sample_data(ps$ITS) = get_sample_data(ps$ITS) %>%
  change_its_ids_to_match_16S() %>%
  column_to_rownames('SampleID')
ps %>% imap(~get_counts(.x) %>% 
              left_join(get_tax(.x), by='feature_id') %>%
              rename(all_of(get_sample_data(.x) %>% pull(SampleID, name=condition_w_rep))) %>%
              write_csv(here(str_glue('output/raw_counts_and_taxonomy_{.y}.csv')))
              )

Removed taxa not assigned to a phylum. After removing these taxa, 1 16S samples was removed due to having total counts <= 1500.

ps_phylum_filt = list('16S' = subset_taxa(ps$`16S`, !is.na(Phylum) & 
                        prune_samples(sample_sums(.) >= 1500, .) %>% prune_taxa(taxa_sums(.) > 0, .),
                      'ITS' = subset_taxa(ps$ITS, !is.na(Phylum) & 
                               !Phylum %in% c("", 'uncharacterized', 'unidentified')) %>% 
                        prune_samples(sample_sums(.) >= 1500, .) %>% prune_taxa(taxa_sums(.) > 0, .))
ps_phylum_filt_tax = ps_phylum_filt %>% map(get_tax)
ps_phylum_filt_counts = ps_phylum_filt %>% map(get_counts)
ps_phylum_filt_ra = map(ps_phylum_filt, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_phylum_filt
phyloseq-class experiment-level object
otu_table()   OTU Table:          [ 946 taxa and 48 samples ]:
sample_data() Sample Data:        [ 48 samples by 10 sample variables ]:
tax_table()   Taxonomy Table:     [ 946 taxa by 7 taxonomic ranks ]:
phy_tree()    Phylogenetic Tree:  [ 946 tips and 919 internal nodes ]:
taxa are rows
sample_data() Sample Data:        [ 50 samples by 9 sample variables ]:tax_table()   Taxonomy Table:     [ 246 taxa by 7 taxonomic ranks ]:
ps_phylum_filt %>% imap(function(ps_phylum_filt, seq_type){
  ps_phylum_filt %>% get_tax() %>%
    summarize(across(-feature_id, ~sum(!is.na(.x))/ n())) %>%
    round(2) %>%
    pivot_longer(everything(), names_to = 'Taxonomic rank', values_to = seq_type)
  }) %>%
  reduce(full_join, by='Taxonomic rank') %>%
  kbl(format = 'html', caption='Fraction of ASVs classified at each rank') %>%
  kable_classic(full_width = F, html_font = "Times New Roman") 
Fraction of ASVs classified at each rank
Taxonomic rank 16S ITS
Kingdom 1.00 1.00
Phylum 1.00 1.00
Class 1.00 0.99
Order 1.00 0.93
Family 0.99 0.89
Genus 0.91 0.86
Species 0.28 0.73
ps_phylum_filt %>% imap(function(ps_phylum_filt, seq_type){
  ps_phylum_filt %>% sample_sums() %>% enframe(name = 'SampleID', value = 'total_counts') %>%
    left_join(metadata, by='SampleID') %>%
    mutate(seq_type = seq_type)
  }) %>%
  bind_rows() %>%
  ggplot(aes(x=condition, y=total_counts)) +
  geom_boxplot(outlier.shape = NA) +
  ggbeeswarm::geom_quasirandom(alpha = 0.3, width=0.2, groupOnX=TRUE) +
  facet_wrap(~seq_type, ncol=1, scales = 'free') +
  scale_y_continuous(labels = scales::comma) +
  labs(title='Total number of counts for each sample') +
  ggeasy::easy_rotate_x_labels() +
  ggeasy::easy_center_title()




Rarefying to 1569 counts.

ps_phylum_filt_rarefied =  ps_phylum_filt %>% map(
  ~rarefy_even_depth(.x, sample.size = 1569, rngseed=1100 , replace=FALSE) %>%
     prune_taxa(taxa_sums(.) > 0, .))
ps_phylum_filt_rarefied
$`16S`

otu_table()   OTU Table:          [ 579 taxa and 48 samples ]:
sample_data() Sample Data:        [ 48 samples by 10 sample variables ]:




phyloseq-class experiment-level object
sample_data() Sample Data:        [ 50 samples by 9 sample variables ]:
phy_tree()    Phylogenetic Tree:  [ 111 tips and 109 internal nodes ]:taxa are rows







prev = map(ps_phylum_filt_ra,
            function(ps_phylum_filt_ra){
              relative_counts = ps_phylum_filt_ra %>% get_counts()
              tax = ps_phylum_filt_ra %>% get_tax()
              relative_counts %>% 
                reframe(feature_id = feature_id,
                         prevalence = rowSums(.> 0) -1, #subtracted 1 because feature_id column is always counted
                         total_relative_abundance = taxa_sums(ps_phylum_filt_ra), 
                         Phylum = tax$Phylum,
                         Genus = tax$Genus,
prev_plots = prev %>% imap(function(prev, seq_type){
    mutate(prevalence = prevalence / nsamples(ps_phylum_filt[[seq_type]])) %>%
  ggplot(aes(total_relative_abundance, prevalence, color=Phylum, text=glue('Genus: {Genus}<br>Species: {Species}'))) +
    geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + 
    scale_x_log10() + xlab("Total Relative Abundance") + ylab("Prevalence [Fraction Samples]") +
    facet_wrap(~Phylum) +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(legend.position="none")})
prev_plots$`16S` %>% plotly::ggplotly()
prev_plots$ITS %>% plotly::ggplotly()

Predominant phyla

ps_phyla = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_phyla = tax_glom(ps_phylum_filt, "Phylum", NArm = TRUE)
                 prevalenceThreshold =  0.05 * nsamples(ps_phyla)
                   get_counts() %>% 
                   summarise(feature_id = feature_id,
                             prevalence = rowSums(.> 0),
                   prune_taxa(., ps_phyla) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_phyla) = sample_data(ps_phyla) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
ps_phyla_ra = map(ps_phyla, ~transform_sample_counts(.x, function(x){x / sum(x)}))
prev_corn_phyla = map(ps_phyla_ra,
            relative_counts = ps_phyla_ra %>% get_counts()
              tax = ps_phyla_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                left_join(metadata, by='SampleID') %>%
                left_join(tax, by='feature_id') %>%
                group_by(Phylum) %>%
                mutate(prevalence_entire_dataset = sum(counts > 0) / n(),
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_phyla$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Phylum, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
Phylum Total Relative Abundance Relative abundance B73 Relative abundance CML322
Proteobacteria 0.866 0.829 0.899
Firmicutes 0.065 0.118 0.016
Actinobacteriota 0.038 0.041 0.035
Bacteroidota 0.030 0.010 0.050
prev_corn_phyla$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Phylum, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  #filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Phylum', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
Phylum Total Relative Abundance Relative abundance B73 Relative abundance CML322
Ascomycota 0.999 0.999 0.999
Basidiomycota 0.001 0.001 0.001




Removed taxa that are present in less than 5% of samples for ASV level dataset. This will be used for differential abundance testing at ASV level.

ps_prevf = map2(ps_phylum_filt, prev,
               function(ps_phylum_filt, prev){
                 keepTaxa = filter(prev, prevalence >= prevalenceThreshold) %>% 
                   pull(feature_id)
                 prune_taxa(keepTaxa, ps_phylum_filt) %>%
                   prune_taxa(taxa_sums(.) > 0, .)})
ps_prevf_ra = map(ps_prevf, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_prevf_clr = map(ps_prevf, microbiome::transform, 'clr')
ps_prevf_alr = map(ps_prevf, microbiome::transform, 'alr', shift=1)
ps_prevf
$`16S`
phyloseq-class experiment-level object
otu_table()   OTU Table:          [ 287 taxa and 48 samples ]:
sample_data() Sample Data:        [ 48 samples by 10 sample variables ]:
tax_table()   Taxonomy Table:     [ 287 taxa by 7 taxonomic ranks ]:phy_tree()    Phylogenetic Tree:  [ 287 tips and 278 internal nodes ]:taxa are rows$ITS

otu_table()   OTU Table:          [ 51 taxa and 50 samples ]:sample_data() Sample Data:        [ 50 samples by 9 sample variables ]:
tax_table()   Taxonomy Table:     [ 51 taxa by 7 taxonomic ranks ]:phy_tree()    Phylogenetic Tree:  [ 51 tips and 50 internal nodes ]:taxa are rows




Agglomerated counts at both genus level and species level.

ps_genus = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_genus = tax_glom(ps_phylum_filt, "Genus", NArm = TRUE)
                 ps_genus = ps_genus %>% 
                   summarise(feature_id = feature_id,
                             prevalence = rowSums(.> 0),
                             TotalAbundance = taxa_sums(ps_genus)) %>%
                   filter(prevalence >= prevalenceThreshold) %>% 
                   prune_taxa(., ps_genus) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_genus) = sample_data(ps_genus) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
                 return(ps_genus)})
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
ps_genus_ra = map(ps_genus, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_genus_clr = map(ps_genus, microbiome::transform, 'clr')
ps_genus_alr = map(ps_genus, microbiome::transform, 'alr', shift=1)
ps_species = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_species = tax_glom(ps_phylum_filt, "Species", NArm = TRUE)
                 prevalenceThreshold =  0.05 * nsamples(ps_species)
                 ps_species = ps_species %>% 
                   get_counts() %>% 
                   summarise(feature_id = feature_id,
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_species) = sample_data(ps_species) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
                 return(ps_species)})
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
ps_species_ra = map(ps_species, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_species_clr = map(ps_species, microbiome::transform, 'clr')
ps_species_alr = map(ps_species, microbiome::transform, 'alr', shift=1)

Most prevalent genera

prev_corn_genus = map(ps_genus_ra,
            function(ps_genus_ra){
            relative_counts = ps_genus_ra %>% get_counts()
              tax = ps_genus_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                left_join(tax, by='feature_id') %>%
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                group_by(Genus, Corn_Genotype) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          mean_relative_abundance = mean(counts),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_genus$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Genus, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
Genus Total Relative Abundance Relative abundance B73 Relative abundance CML322
Pantoea 0.461 0.419 0.500
Klebsiella 0.094 0.024 0.159
Enterobacter 0.051 0.101 0.005
Carnimonas 0.041 0.085 0.000
Burkholderia-Caballeronia-Paraburkholderia 0.030 0.048 0.013
Serratia 0.029 0.033 0.026
Stenotrophomonas 0.029 0.014 0.043
Lactococcus 0.024 0.048 0.003
Sphingobacterium 0.024 0.007 0.039
Achromobacter 0.023 0.008 0.036
Rosenbergiella 0.022 0.047 0.000
Listeria 0.019 0.040 0.000
Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium 0.017 0.009 0.023
Ochrobactrum 0.017 0.013 0.020
Enterococcus 0.010 0.013 0.007
prev_corn_genus$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Genus, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  #filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
Genus Total Relative Abundance Relative abundance B73 Relative abundance CML322
Aspergillus 0.552 0.497 0.608
Sarocladium 0.252 0.226 0.279
Meyerozyma 0.124 0.148 0.101
Talaromyces 0.040 0.076 0.003
Trichoderma 0.013 0.026 0.001
Penicillium 0.006 0.011 0.001
Curvularia 0.005 0.007 0.003
Alternaria 0.004 0.007 0.002
Ramichloridium 0.001 0.001 0.001
Acremonium 0.000 0.001 0.000
Bipolaris 0.000 0.000 0.000
Candida 0.000 0.000 0.000
Ceriporia 0.000 0.000 0.000
Cladosporium 0.000 0.000 0.000
Cyphellophora 0.000 0.000 0.000
Exserohilum 0.000 0.000 0.000
Fomes 0.000 0.000 0.000
Fusarium 0.000 0.000 0.000
Lecanicillium 0.000 0.000 0.000
Moesziomyces 0.000 0.000 0.000
Myrothecium 0.000 0.001 0.000
Paraconiothyrium 0.000 0.000 0.000
Peniophora 0.000 0.000 0.000
Peziza 0.000 0.001 0.000
Phanerochaete 0.000 0.000 0.000
Phlebia 0.000 0.000 0.000
Pyricularia 0.000 0.000 0.000
Scopuloides 0.000 0.000 0.000
Stereum 0.000 0.001 0.000
Tinctoporellus 0.000 0.000 0.000
Trametes 0.000 0.000 0.000
unidentified 0.000 0.000 0.000

Most prevalent species

prev_corn_species = map(ps_species_ra,
            function(ps_species_ra){
            relative_counts = ps_species_ra %>% get_counts()
              tax = ps_species_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                mutate(prevalence_entire_dataset = sum(counts > 0) / n(),
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                group_by(Species, Corn_Genotype) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          mean_relative_abundance = mean(counts),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_species$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Species, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Species', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
Species Total Relative Abundance Relative abundance B73 Relative abundance CML322
Pantoea_ananatis 0.193 0.207 0.173
Burkholderia_gladioli 0.152 0.195 0.091
Listeria_grayi 0.070 0.119 0.000
Lactococcus_lactis 0.068 0.077 0.056
Sphingobacterium_siyangense 0.050 0.001 0.120
Lactococcus_garvieae 0.043 0.073 0.000
Sphingobacterium_thalpophilum 0.032 0.000 0.079
Sphingomonas_phyllosphaerae 0.030 0.000 0.073
Acinetobacter_baumannii 0.029 0.000 0.070
Pseudomonas_psychrotolerans 0.026 0.008 0.052
Comamonas_sediminis 0.023 0.000 0.056
Corynebacterium_kroppenstedtii 0.021 0.036 0.000
Flavobacterium_anatoliense 0.019 0.000 0.047
Sphingobacterium_multivorum 0.015 0.012 0.019
Staphylococcus_sciuri 0.015 0.026 0.000
Devosia_riboflavina 0.014 0.000 0.035
uncultured_Tistrella 0.013 0.023 0.000
uncultured_bacterium 0.010 0.014 0.003
prev_corn_species$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Species, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  kbl(format = 'html', col.names = c('Species', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
Species Total Relative Abundance Relative abundance B73 Relative abundance CML322
Aspergillus_flavus 0.567 0.525 0.608
Sarocladium_zeae 0.259 0.234 0.284
Meyerozyma_caribbica 0.135 0.168 0.102
Aspergillus_niger 0.011 0.021 0.000
Talaromyces_purpureogenus 0.010 0.019 0.000
reads = list('16S' = read_qza(here('data', '16S', 'rep-seqs-filtered.qza'))$data,
             'ITS' = read_qza(here('data', 'ITS', 'rep-seqs-filtered.qza'))$data)

Below are barplots of relative taxon abundances for 16S sequencing with samples grouped according to similarity using the neatmap method.

## https://github.com/google/palette.js/blob/79a703df344e3b24380ce1a211a2df7f2d90ca22/palette.js#L802
mpn65 = c('#ff0029','#377eb8','#66a61e','#984ea3','#00d2d5','#ff7f00','#af8d00','#7f80cd','#b3e900','#c42e60','#a65628',
         '#f781bf','#8dd3c7','#bebada','#fb8072','#80b1d3','#fdb462','#fccde5','#bc80bd','#ffed6f','#c4eaff','#cf8c00',
         '#1b9e77','#d95f02','#e7298a','#e6ab02','#a6761d','#0097ff','#00d067','#000000','#252525','#525252','#737373',
         '#969696','#bdbdbd','#f43600','#4ba93b','#5779bb','#927acc','#97ee3f','#bf3947','#9f5b00','#f48758','#8caed6',
         '#cbe8ff','#fecddf','#c27eb6','#8cd2ce','#c4b8d9','#f883b0','#a49100','#f48800','#27d0df','#a04a9b')
map(rank_names(ps_genus$`16S`)[2:6], function(tax_rank){
  df = ps_genus$`16S` %>% 
    speedyseq::mutate_sample_data(condition = condition_w_rep) %>%
    transform(transform = "compositional") %>%
    aggregate_rare(level = tax_rank, detection = 0.05, prevalence = 0.05) 
  scale_fill_manual(values=mpn65) +
  theme(axis.text.x=element_text(angle=90,hjust=0, vjust=0.5)) +
  labs(x = "Sample condition",
       y = "Relative abundance",
       title = glue("16S Relative abundance at {tax_rank} level"), 
       fill = tax_rank)
})
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]
NA







The next two sets are done with ITS counts but made the same way as above.

map(rank_names(ps_genus$ITS)[2:6], function(tax_rank){
    speedyseq::mutate_sample_data(condition = condition_w_rep) %>%
    transform(transform = "compositional") %>%
    aggregate_rare(level = tax_rank, detection = 0.05, prevalence = 0.05) 
  p = plot_composition(df, x.label='condition', otu.sort = 'abundance', sample.sort='neatmap') +
  guides(fill = guide_legend(ncol = 1)) +
  scale_fill_manual(values=mpn65) +
  labs(x = "Sample condition",
       y = "Relative abundance",
       title = glue("ITS Relative abundance at {tax_rank} level"), 
  p %>% plotly::ggplotly()
Warning: stress is (nearly) zero: you may have insufficient data
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]
NA
prune_taxa(names(sort(taxa_sums(ps_genus_ra$`16S`),decreasing = TRUE)[1:30]), ps_genus_ra$`16S`) %>%
  speedyseq::plot_heatmap(method = "NMDS", distance = "bray", sample.label = 'condition', taxa.label='Genus') +
        strip.text.x = element_text(size = 16), plot.title = element_text(size=22)) +
  labs(title = '16S Heatmap of top 30 most abundant genera')







prune_taxa(names(sort(taxa_sums(ps_genus_ra$ITS),decreasing = TRUE)[1:30]), ps_genus_ra$ITS) %>%
  speedyseq::plot_heatmap(method = "NMDS", distance = "bray", sample.label = 'condition', taxa.label='Genus') +
        strip.text.x = element_text(size = 16), plot.title = element_text(size=22)) +
  labs(title = 'ITS Heatmap of top 30 most abundant genera')

save.image(here('src/16_and_ITS_import.RData'))
---
title: "16S and ITS data import"
output:
  html_notebook:
    df_print: paged
    code_folding: hide
  editor_options: 
    chunk_output_type: inline
---

```{r include=FALSE}
library(qiime2R)
library(ggeasy)
library(phyloseq)
library(Biostrings)
library(ggsignif)
library(ggeasy)
library(ggpubr)
library(readxl)
library(patchwork)
library(DESeq2)
library(ALDEx2)
library(microbiome) 
library(vegan)
library(ANCOMBC)
library(glue)
library(ggstatsplot)
library(beeswarm)
library(msa)
library(here)
library(ggtext)
library(kableExtra)
library(tidyHeatmap)
library(SpiecEasi)
library(NetCoMi)
library(igraph)
library(tidygraph)
library(ggraph)
library(tidyverse)
knitr::opts_chunk$set(fig.retina = 1, dpi=450)
options(dplyr.summarise.inform=F)
theme_set(theme_bw())
sessionInfo() %>%
  capture.output(sessionInfo()) %>%
  write_lines(here('output/session_info.txt'))
```

```{r, message = FALSE, warning = FALSE}
metadata = read_tsv(here('data', 'metadata_merged.tsv'), col_types = cols()) %>%
  rename(TimePoint = Timepoint) %>%
  mutate(TimePoint = str_replace(TimePoint, '^', 'T'),
         condition = str_c(CornVariety, FungalStrain, TissueExtraction, sep='_')) %>%
  rename(Corn_Genotype = CornVariety, Fungal_Treatment = FungalStrain, Tissue_Extraction = TissueExtraction)
```

```{r}
get_counts = function(.) otu_table(.) %>% as.data.frame() %>% rownames_to_column('feature_id') %>% as_tibble()
get_tax = function(.) tax_table(.) %>% data.frame() %>% rownames_to_column('feature_id') %>% as_tibble()
get_qza = function(filepath){read_qza(filepath)$data %>% as.data.frame() %>% rownames_to_column('SampleID')}
get_sample_data = function(.) data.frame(sample_data(.)) %>% rownames_to_column('SampleID') %>% as_tibble()
change_its_ids_to_match_16S = . %>% 
  mutate(SampleID = str_replace(SampleID, '(.*?)-(.*)', '\\2-\\1'),
         SampleID = str_replace_all(SampleID, c('-B73'='-373', '-C322'='-322')))
change_16S_ids_to_match_ITS = . %>% 
  mutate(SampleID = str_replace(SampleID, '(.*?)-(.*)', '\\2-\\1'),
         SampleID = str_replace_all(SampleID, c('-373'='-B73', '-322'='-C322')))
```

```{r}
ps = list('16S'= qza_to_phyloseq(features = here('data', '16S', 'table_wo_outliers.qza'),
                          tree=here('data', '16S', 'rooted-tree.qza'),
                          taxonomy=here('data', '16S', 'merged-taxonomy.qza'),
                          metadata=here('data', '16S', 'metadata_filtered.tsv'),
                          tmp='C:/Users/brian.mack/Downloads/tmp') %>% 
            subset_samples(CornVariety != 'dummy' & Timepoint == '2' & TissueType == 'Ovule') %>%
            prune_taxa(taxa_sums(.) > 0, .),
          'ITS' = qza_to_phyloseq(features = here('data', 'ITS', 'table-no-mitochondria-no-chloroplast.qza'),
                          tree=here('data', 'ITS', 'rooted-tree.qza'),
                          taxonomy=here('data', 'ITS', 'merged-taxonomy.qza'),
                          metadata=here('data', 'ITS', 'metadata_merged.tsv'),
                          tmp='C:/Users/brian.mack/Downloads/tmp')  %>% 
            subset_samples(CornVariety  != 'dummy' & Timepoint == '2' & TissueType == 'Ovule') %>%
            prune_taxa(taxa_sums(.) > 0, .)
            )
ps = map(ps, function(.x){
  sample_data(.x) = get_sample_data(.x) %>% 
    rename(TimePoint = Timepoint) %>%
    mutate(TimePoint = str_replace(TimePoint, '^', 'T'),
           condition = str_c(CornVariety, FungalStrain, TissueExtraction, sep='_')) %>%
    rename(Corn_Genotype = CornVariety, Fungal_Treatment = FungalStrain, Tissue_Extraction = TissueExtraction) %>%
    group_by(condition) %>%
    mutate(biological_rep = row_number()) %>%
    ungroup() %>%
    mutate(condition_w_rep = str_c(condition, biological_rep, sep='_')) %>%
    column_to_rownames('SampleID')
  return(.x)})
sample_data(ps$ITS) = get_sample_data(ps$ITS) %>%
  change_its_ids_to_match_16S() %>%
  column_to_rownames('SampleID')
ps %>% imap(~get_counts(.x) %>% 
              left_join(get_tax(.x), by='feature_id') %>%
              rename(all_of(get_sample_data(.x) %>% pull(SampleID, name=condition_w_rep))) %>%
              write_csv(here(str_glue('output/raw_counts_and_taxonomy_{.y}.csv')))
              )
```

Removed taxa not assigned to a phylum. After removing these taxa, 1 16S samples was removed due to having total counts <= 1500.

```{r}
ps_phylum_filt = list('16S' = subset_taxa(ps$`16S`, !is.na(Phylum) & 
                                            !Phylum %in% c('', 'uncharacterized', 'unidentified') & 
                                            !Kingdom %in% c('d__Eukaryota')) %>% 
                        prune_samples(sample_sums(.) >= 1500, .) %>% prune_taxa(taxa_sums(.) > 0, .),
                      'ITS' = subset_taxa(ps$ITS, !is.na(Phylum) & 
                               !Phylum %in% c("", 'uncharacterized', 'unidentified')) %>% 
                        prune_samples(sample_sums(.) >= 1500, .) %>% prune_taxa(taxa_sums(.) > 0, .))
ps_phylum_filt_tax = ps_phylum_filt %>% map(get_tax)
ps_phylum_filt_counts = ps_phylum_filt %>% map(get_counts)
ps_phylum_filt_ra = map(ps_phylum_filt, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_phylum_filt
```
```{r}
ps_phylum_filt %>% imap(function(ps_phylum_filt, seq_type){
  ps_phylum_filt %>% get_tax() %>%
    summarize(across(-feature_id, ~sum(!is.na(.x))/ n())) %>%
    round(2) %>%
    pivot_longer(everything(), names_to = 'Taxonomic rank', values_to = seq_type)
  }) %>%
  reduce(full_join, by='Taxonomic rank') %>%
  kbl(format = 'html', caption='Fraction of ASVs classified at each rank') %>%
  kable_classic(full_width = F, html_font = "Times New Roman") 
```

```{r, fig.height=6}
ps_phylum_filt %>% imap(function(ps_phylum_filt, seq_type){
  ps_phylum_filt %>% sample_sums() %>% enframe(name = 'SampleID', value = 'total_counts') %>%
    left_join(metadata, by='SampleID') %>%
    mutate(seq_type = seq_type)
  }) %>%
  bind_rows() %>%
  ggplot(aes(x=condition, y=total_counts)) +
  geom_boxplot(outlier.shape = NA) +
  ggbeeswarm::geom_quasirandom(alpha = 0.3, width=0.2, groupOnX=TRUE) +
  facet_wrap(~seq_type, ncol=1, scales = 'free') +
  scale_y_continuous(labels = scales::comma) +
  labs(title='Total number of counts for each sample') +
  ggeasy::easy_rotate_x_labels() +
  ggeasy::easy_center_title()
```

<br><br><br>

Rarefying to 1569 counts.

```{r, warning=FALSE, message=FALSE}
ps_phylum_filt_rarefied =  ps_phylum_filt %>% map(
  ~rarefy_even_depth(.x, sample.size = 1569, rngseed=1100 , replace=FALSE) %>%
     prune_taxa(taxa_sums(.) > 0, .))
ps_phylum_filt_rarefied
```

<br><br><br><br><br><br>

```{r, fig.width=12}
prev = map(ps_phylum_filt_ra,
            function(ps_phylum_filt_ra){
              relative_counts = ps_phylum_filt_ra %>% get_counts()
              tax = ps_phylum_filt_ra %>% get_tax()
              relative_counts %>% 
                reframe(feature_id = feature_id,
                         prevalence = rowSums(.> 0) -1, #subtracted 1 because feature_id column is always counted
                         total_relative_abundance = taxa_sums(ps_phylum_filt_ra), 
                         Phylum = tax$Phylum,
                         Genus = tax$Genus,
                         Species = tax$Species)})
prev_plots = prev %>% imap(function(prev, seq_type){
  prev %>% 
    mutate(prevalence = prevalence / nsamples(ps_phylum_filt[[seq_type]])) %>%
  ggplot(aes(total_relative_abundance, prevalence, color=Phylum, text=glue('Genus: {Genus}<br>Species: {Species}'))) +
    geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + 
    geom_point(size = 2, alpha = 0.6) +
    scale_x_log10() + xlab("Total Relative Abundance") + ylab("Prevalence [Fraction Samples]") +
    facet_wrap(~Phylum) +
    labs(title = glue('{seq_type} ASV Prevalence')) +
    theme_gray() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(legend.position="none")})
```

```{r, out.width=12}
prev_plots$`16S` %>% plotly::ggplotly()
```
```{r, fig.width=12, fig.height=4}
prev_plots$ITS %>% plotly::ggplotly()
```
Predominant phyla
```{r}
ps_phyla = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_phyla = tax_glom(ps_phylum_filt, "Phylum", NArm = TRUE)
                 prevalenceThreshold =  0.05 * nsamples(ps_phyla)
                 ps_phyla = ps_phyla %>% 
                   get_counts() %>% 
                   summarise(feature_id = feature_id,
                             prevalence = rowSums(.> 0),
                             TotalAbundance = taxa_sums(ps_phyla)) %>%
                   filter(prevalence >= prevalenceThreshold) %>% 
                   pull(feature_id) %>%
                   prune_taxa(., ps_phyla) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_phyla) = sample_data(ps_phyla) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
                 return(ps_phyla)})
ps_phyla_ra = map(ps_phyla, ~transform_sample_counts(.x, function(x){x / sum(x)}))
prev_corn_phyla = map(ps_phyla_ra,
            function(ps_phyla_ra){
            relative_counts = ps_phyla_ra %>% get_counts()
              tax = ps_phyla_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                left_join(metadata, by='SampleID') %>%
                left_join(tax, by='feature_id') %>%
                group_by(Phylum) %>%
                mutate(prevalence_entire_dataset = sum(counts > 0) / n(),
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                group_by(Phylum, Corn_Genotype) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          mean_relative_abundance = mean(counts),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_phyla$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Phylum, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Phylum', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
prev_corn_phyla$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Phylum, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  #filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Phylum', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
```
<br><br><br> Removed taxa that are present in less than 5% of samples for ASV level dataset. This will be used for differential abundance testing at ASV level.
```{r}
ps_prevf = map2(ps_phylum_filt, prev,
               function(ps_phylum_filt, prev){
                 prevalenceThreshold =  0.05 * nsamples(ps_phylum_filt)
                 keepTaxa = filter(prev, prevalence >= prevalenceThreshold) %>% 
                   pull(feature_id)
                 prune_taxa(keepTaxa, ps_phylum_filt) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)})
ps_prevf_ra = map(ps_prevf, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_prevf_clr = map(ps_prevf, microbiome::transform, 'clr')
ps_prevf_alr = map(ps_prevf, microbiome::transform, 'alr', shift=1)
ps_prevf
```

<br><br><br> Agglomerated counts at both genus level and species level.

```{r}
ps_genus = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_genus = tax_glom(ps_phylum_filt, "Genus", NArm = TRUE)
                 prevalenceThreshold =  0.05 * nsamples(ps_genus)
                 ps_genus = ps_genus %>% 
                   get_counts() %>% 
                   summarise(feature_id = feature_id,
                             prevalence = rowSums(.> 0),
                             TotalAbundance = taxa_sums(ps_genus)) %>%
                   filter(prevalence >= prevalenceThreshold) %>% 
                   pull(feature_id) %>%
                   prune_taxa(., ps_genus) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_genus) = sample_data(ps_genus) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
                 return(ps_genus)})
ps_genus_ra = map(ps_genus, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_genus_clr = map(ps_genus, microbiome::transform, 'clr')
ps_genus_alr = map(ps_genus, microbiome::transform, 'alr', shift=1)
ps_species = map(ps_phylum_filt, 
               function(ps_phylum_filt){
                 ps_species = tax_glom(ps_phylum_filt, "Species", NArm = TRUE)
                 prevalenceThreshold =  0.05 * nsamples(ps_species)
                 ps_species = ps_species %>% 
                   get_counts() %>% 
                   summarise(feature_id = feature_id,
                             prevalence = rowSums(.> 0),
                             TotalAbundance = taxa_sums(ps_species)) %>%
                   filter(prevalence >= prevalenceThreshold) %>% 
                   pull(feature_id) %>%
                   prune_taxa(., ps_species) %>%
                   prune_samples(sample_sums(.) >= 500, .) %>% 
                   prune_taxa(taxa_sums(.) > 0, .)
                 sample_data(ps_species) = sample_data(ps_species) %>% 
                   as('data.frame') %>%
                   mutate(TissueType = fct_relevel(TissueType, 'Ovule'))
                 return(ps_species)})
ps_species_ra = map(ps_species, ~transform_sample_counts(.x, function(x){x / sum(x)}))
ps_species_clr = map(ps_species, microbiome::transform, 'clr')
ps_species_alr = map(ps_species, microbiome::transform, 'alr', shift=1)
```

Most prevalent genera
```{r}
prev_corn_genus = map(ps_genus_ra,
            function(ps_genus_ra){
            relative_counts = ps_genus_ra %>% get_counts()
              tax = ps_genus_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                left_join(metadata, by='SampleID') %>%
                left_join(tax, by='feature_id') %>%
                group_by(Genus) %>%
                mutate(prevalence_entire_dataset = sum(counts > 0) / n(),
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                group_by(Genus, Corn_Genotype) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          mean_relative_abundance = mean(counts),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_genus$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Genus, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Genus', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
prev_corn_genus$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Genus, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  #filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Genus', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
```
Most prevalent species
```{r}
prev_corn_species = map(ps_species_ra,
            function(ps_species_ra){
            relative_counts = ps_species_ra %>% get_counts()
              tax = ps_species_ra %>% get_tax()
              relative_counts %>% 
                pivot_longer(-feature_id, names_to = 'SampleID', values_to = 'counts') %>%
                left_join(metadata, by='SampleID') %>%
                left_join(tax, by='feature_id') %>%
                group_by(Species) %>%
                mutate(prevalence_entire_dataset = sum(counts > 0) / n(),
                       mean_relative_abundance_entire_dataset = mean(counts)) %>%
                group_by(Species, Corn_Genotype) %>%
                summarize(prevalence = sum(counts > 0) / n(),
                          mean_relative_abundance = mean(counts),
                          prevalence_entire_dataset = first(prevalence_entire_dataset),
                          mean_relative_abundance_entire_dataset = first(mean_relative_abundance_entire_dataset)) %>%
                ungroup()})
prev_corn_species$`16S` %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Species, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Species', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
prev_corn_species$ITS %>% 
  rename(total = mean_relative_abundance_entire_dataset) %>%
  distinct(Species, Corn_Genotype, mean_relative_abundance, total) %>%
  mutate(mean_relative_abundance= round(mean_relative_abundance, 3),
         total = round(total, 3)) %>%
  pivot_wider(names_from = Corn_Genotype, values_from = mean_relative_abundance) %>%
  filter(total >= 0.01) %>%
  arrange(desc(total)) %>%
  kbl(format = 'html', col.names = c('Species', 'Total Relative Abundance', 
                                     'Relative abundance B73', 'Relative abundance CML322')) %>%
  kable_classic(full_width = F, html_font = "Times New Roman")
```
```{r}
reads = list('16S' = read_qza(here('data', '16S', 'rep-seqs-filtered.qza'))$data,
             'ITS' = read_qza(here('data', 'ITS', 'rep-seqs-filtered.qza'))$data)
```

Below are barplots of relative taxon abundances for 16S sequencing with samples grouped according to similarity using the neatmap method. 

```{r, fig.width=12, fig.height=10, warning=FALSE, fig.show='hide'}
## https://github.com/google/palette.js/blob/79a703df344e3b24380ce1a211a2df7f2d90ca22/palette.js#L802
mpn65 = c('#ff0029','#377eb8','#66a61e','#984ea3','#00d2d5','#ff7f00','#af8d00','#7f80cd','#b3e900','#c42e60','#a65628',
         '#f781bf','#8dd3c7','#bebada','#fb8072','#80b1d3','#fdb462','#fccde5','#bc80bd','#ffed6f','#c4eaff','#cf8c00',
         '#1b9e77','#d95f02','#e7298a','#e6ab02','#a6761d','#0097ff','#00d067','#000000','#252525','#525252','#737373',
         '#969696','#bdbdbd','#f43600','#4ba93b','#5779bb','#927acc','#97ee3f','#bf3947','#9f5b00','#f48758','#8caed6',
         '#f2b94f','#eff26e','#e43872','#d9b100','#9d7a00','#698cff','#d9d9d9','#00d27e','#d06800','#009f82','#c49200',
         '#cbe8ff','#fecddf','#c27eb6','#8cd2ce','#c4b8d9','#f883b0','#a49100','#f48800','#27d0df','#a04a9b')
```

```{r, fig.width=12, fig.height=8}
map(rank_names(ps_genus$`16S`)[2:6], function(tax_rank){
  df = ps_genus$`16S` %>% 
    speedyseq::mutate_sample_data(condition = condition_w_rep) %>%
    transform(transform = "compositional") %>%
    aggregate_rare(level = tax_rank, detection = 0.05, prevalence = 0.05) 
  p = plot_composition(df, x.label='condition', otu.sort = 'abundance', sample.sort='neatmap') +
  guides(fill = guide_legend(ncol = 1)) +
  scale_fill_manual(values=mpn65) +
  theme_minimal() + 
  theme(axis.text.x=element_text(angle=90,hjust=0, vjust=0.5)) +
  labs(x = "Sample condition",
       y = "Relative abundance",
       title = glue("16S Relative abundance at {tax_rank} level"), 
       fill = tax_rank)
     p %>% plotly::ggplotly()
})
```


<br><br><br><br><br><br> The next two sets are done with ITS counts but made the same way as above.

```{r, fig.width=12, fig.height=8}
map(rank_names(ps_genus$ITS)[2:6], function(tax_rank){
  df = ps_genus$ITS %>% 
    speedyseq::mutate_sample_data(condition = condition_w_rep) %>%
    transform(transform = "compositional") %>%
    aggregate_rare(level = tax_rank, detection = 0.05, prevalence = 0.05) 
  p = plot_composition(df, x.label='condition', otu.sort = 'abundance', sample.sort='neatmap') +
  guides(fill = guide_legend(ncol = 1)) +
  scale_fill_manual(values=mpn65) +
  theme_minimal() + 
  theme(axis.text.x=element_text(angle=90,hjust=0, vjust=0.5)) +
  labs(x = "Sample condition",
       y = "Relative abundance",
       title = glue("ITS Relative abundance at {tax_rank} level"), 
       fill = tax_rank)
  p %>% plotly::ggplotly()
})
```


```{r, fig.width=15, fig.height=8, warning=FALSE}
prune_taxa(names(sort(taxa_sums(ps_genus_ra$`16S`),decreasing = TRUE)[1:30]), ps_genus_ra$`16S`) %>%
  speedyseq::plot_heatmap(method = "NMDS", distance = "bray", sample.label = 'condition', taxa.label='Genus') +
  theme(axis.text.x=element_text(angle=90,hjust=0, vjust=0.5, size = 10),
        strip.text.x = element_text(size = 16), plot.title = element_text(size=22)) +
  labs(title = '16S Heatmap of top 30 most abundant genera')
```

<br><br><br><br><br><br>

```{r, fig.width=15, fig.height=8, warning=FALSE}
prune_taxa(names(sort(taxa_sums(ps_genus_ra$ITS),decreasing = TRUE)[1:30]), ps_genus_ra$ITS) %>%
  speedyseq::plot_heatmap(method = "NMDS", distance = "bray", sample.label = 'condition', taxa.label='Genus') +
  theme(axis.text.x=element_text(angle=90,hjust=0, vjust=0.5, size = 10),
        strip.text.x = element_text(size = 16), plot.title = element_text(size=22)) +
  labs(title = 'ITS Heatmap of top 30 most abundant genera')
```
```{r}
save.image(here('src/16_and_ITS_import.RData'))
```

